Salmon length at age model summary

DIASPARA WP 2.2

Author

Viktor Thunell

Published

January 26, 2026

Load libraries

Show code
# Load libraries, install if needed
pkgs <- c("tidyverse", "tidylog", "devtools","viridis","nls.multstart", "broom", "patchwork", "coda", "boot", "tidybayes","bayesplot", "nimble", "here", "purrr")

# remotes::install_github("nimble-dev/nimble", ref = "devel", subdir = "packages/nimble")

if(length(setdiff(pkgs,rownames(installed.packages()))) > 0){
    install.packages(setdiff(pkgs, rownames(installed.packages())), dependencies = T)
  }

invisible(lapply(pkgs, library, character.only = T))

options(ggplot2.continuous.colour = "viridis")
theme_set(theme_minimal())

# Set path
color_scheme_set("viridis")
home <- here::here()

Length at age

Read and define data

Show code
sallaa <- readRDS(file = "~/gitProjects/diasp-lht/data/data-for-2-2/salmon-laa_2025-12-21.RData") %>%
  rename(year = fi_year,
         site = sai_location,
         sea.age = sea_age_year,
         juv.age = juvenile_age_year,
         tot.age = tot_age_year,
         lat = fisa_y_4326,
         lon = fisa_x_4326) %>%
  #filter out tot.age = 0 individuals
  filter(!(sea.age == 0 & juv.age == 0)) %>%
  #filter out non-aged individuals (~90000 individuals)
  filter(!(is.na(sea.age) & is.na(juv.age))) %>%
  # removes smolt.age = 0:
  filter(!(age.type == "both" & juv.age == 0)) %>%
  mutate(hatch.year = year-tot.age) %>% # to filter data based o hatch year instead of year to predict growth pars before catch year
  filter(!is.na(juv.age), # removes age.type = "sea.only"
         !is.na(year),
         hatch.year > 1997)

data.biph <- sallaa %>%
  filter(!(is.na(sex) & age.type == "both")) %>% # removes 27 000 rows
  mutate(yday.may1 = if_else(is.na(date), 1, yday(date) - yday("2025-05-01")+1), 
         doy.dec = if_else(yday.may1 > 0, yday.may1, yday.may1+365)/365,
         tot.age.dec = if_else(yday.may1 > 0, tot.age+doy.dec, tot.age+doy.dec-1),
         smo.age = if_else(age.type == "both", juv.age, NA),
         sea.age = if_else(is.na(sea.age), 0, sea.age),
         g.year = trunc(tot.age.dec)+1, # + 1 to index age 0 individuals
         hatch.year.f = as.numeric(factor(hatch.year, levels = sort(unique(hatch.year)), labels = seq_along(sort(unique(hatch.year))))),
         year.f = as.integer(factor(year)),
         sex = as.integer(if_else(sex == "f", 1,2)),
         su = as.integer(factor(spat.unit))) %>%
  mutate(ind.id = row_number())

# "both" data (individuals caught at sea)
data.b <- data.biph %>%
  filter(!is.na(smo.age))

# "juve.age" data (immature individuals caught in freshwater)
data.j <- data.biph %>%
  filter(age.type == "juve.only") %>%
  rename(length_mm_j = length_mm)

Plot length at age data

Show code
data.biph %>%
  mutate(sex = ifelse(sex == 1, "f","m")) %>%
  ggplot(aes(tot.age, length_mm, color = factor(sex)), alpha = 0.3) +
  geom_point(size = 0.1) +
  facet_grid(age.type~sex) +
  scale_x_continuous(breaks = seq(0, 13, 1) ) +
  theme_minimal()  
mutate: converted 'sex' from integer to character (0 new NA)
Warning in fortify(data, ...): Arguments in `...` must be used.
✖ Problematic argument:
• alpha = 0.3
ℹ Did you misspell an argument name?

Show code
data.biph %>% 
  ggplot() +
  geom_density(aes(x = tot.age, fill = sex), alpha = 0.3) +
  xlim(0, 13) +

data.biph %>% 
  ggplot() + 
  geom_density(aes(x = length_mm, fill = sex), alpha = 0.3)
Warning: The following aesthetics were dropped during statistical transformation: fill.
ℹ This can happen when ggplot fails to infer the correct grouping structure in
  the data.
ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
  variable into a factor?
Warning: The following aesthetics were dropped during statistical transformation: fill.
ℹ This can happen when ggplot fails to infer the correct grouping structure in
  the data.
ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
  variable into a factor?

Show code
data.biph %>% 
  mutate(sex = ifelse(sex == 1, "f","m")) %>%
  ggplot() +
  geom_density(aes(x = length_mm, fill = factor(sex)), alpha = 0.5) +
  facet_wrap(~tot.age, scales = "free_y") +
  labs(caption = "length density by age") 
mutate: converted 'sex' from integer to character (0 new NA)
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_density()`).

Show code
# age at smoltification by spatial unit
data.biph %>%
  mutate(sex = ifelse(sex == 1, "f","m")) %>%
  filter(age.type == "both") %>%
  ggplot() +
  geom_density(aes(x = juv.age),  alpha = 0.3) +
  facet_wrap(~spat.unit) +
  labs(x = "age at smoltification") +
  xlim(-1, 5)
mutate: converted 'sex' from integer to character (0 new NA)
filter: removed 32,262 rows (41%), 47,297 rows remaining

Show code
# juvenile size at age
data.biph %>%
  filter(age.type == "juve.only") %>%
  ggplot(aes(x = juv.age, y =length_mm)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = lm) +
  facet_wrap(~spat.unit)
filter: removed 47,297 rows (59%), 32,262 rows remaining
`geom_smooth()` using formula = 'y ~ x'

Show code
data.biph %>%
  filter(age.type == "both") %>%
  mutate(sex = ifelse(sex == 1, "f","m")) %>%
  ggplot(aes(tot.age, length_mm, color = factor(sex))) +
  geom_point(alpha = 0.3) +
  facet_wrap(~spat.unit) +
  scale_x_continuous(breaks = seq(0, 13, 1), limit = c(0,NA) ) +
  theme_minimal()
filter: removed 32,262 rows (41%), 47,297 rows remaining
mutate: converted 'sex' from integer to character (0 new NA)

Source models / Load samples

Show code
laa.samples <- readRDS(paste0(home,"/models_salmon/samples/salaa_samples_2025-12-22.RData"))$samples

# # Or source model code and mcmc (load libs and data)
# t <- Sys.time()
# source(file = paste0(home,"/R/model devel/2-2_model_salmon_biphasic_v6.R"))
# Sys.time() - t # approx 12 hours with 12 000 NUTS mcmc samples

Traces and ‘potential scale reduction factor’

Show code
node.sub <- grep("sig.l", colnames(laa.samples[[1]]), value = TRUE)
laa.samples[, node.sub[], drop = FALSE] %>% mcmc_trace()

Show code
laa.samples[, node.sub[], drop = FALSE] %>% gelman.diag()
Potential scale reduction factors:

       Point est. Upper C.I.
sig.l           1          1
sig.lj          1          1

Multivariate psrf

1
Show code
laa.samples[, node.sub[], drop = FALSE] %>%
effectiveSize()
   sig.l   sig.lj 
2737.001 2241.881 
Show code
node.sub <- grep("^g\\[", colnames(laa.samples[[1]]), value = TRUE)
laa.samples[, node.sub[], drop = FALSE] %>% mcmc_trace()

Show code
laa.samples[, node.sub[], drop = FALSE] %>% gelman.diag()
Potential scale reduction factors:

      Point est. Upper C.I.
g[1]        1.00       1.00
g[2]        1.00       1.00
g[3]        1.00       1.00
g[4]        1.00       1.00
g[5]        1.01       1.02
g[6]        1.00       1.02
g[7]        1.00       1.00
g[8]        1.00       1.00
g[9]        1.00       1.00
g[10]       1.00       1.00
g[11]       1.01       1.04
g[12]       1.00       1.00

Multivariate psrf

1.01
Show code
print("effective sample size")
[1] "effective sample size"
Show code
laa.samples[, node.sub[], drop = FALSE] %>% effectiveSize()
    g[1]     g[2]     g[3]     g[4]     g[5]     g[6]     g[7]     g[8] 
2705.005 2274.933 3048.008 2628.006 2456.925 2402.404 1991.974 2690.824 
    g[9]    g[10]    g[11]    g[12] 
2535.784 2896.670 2737.364 2753.322 
Show code
node.sub <- grep("^k.year\\[", colnames(laa.samples[[1]]), value = TRUE)
laa.samples[, node.sub[sample(1:length(node.sub), 36)], drop = FALSE] %>% mcmc_trace()

Show code
pl <- laa.samples[, node.sub[], drop = FALSE] %>% gelman.diag()
pl$psrf[  which(pl$psrf[, 1] > 1.1),] 
     Point est. Upper C.I.
Show code
pl <- laa.samples[, node.sub[], drop = FALSE] %>%
effectiveSize()
print("effective sample size")
[1] "effective sample size"
Show code
pl[which(pl < 500)]
k.year[9, 1, 17] k.year[9, 1, 18] k.year[9, 1, 19] 
        465.7999         412.8642         486.5969 
Show code
node.sub <- grep("^linf\\[", colnames(laa.samples[[1]]), value = TRUE)
laa.samples[, node.sub[], drop = FALSE] %>% mcmc_trace()

Show code
laa.samples[, node.sub[], drop = FALSE] %>% gelman.diag()
Potential scale reduction factors:

            Point est. Upper C.I.
linf[1, 1]        1.00       1.00
linf[2, 1]        1.00       1.00
linf[3, 1]        1.00       1.00
linf[4, 1]        1.01       1.04
linf[5, 1]        1.01       1.07
linf[6, 1]        1.00       1.01
linf[7, 1]        1.00       1.00
linf[8, 1]        1.00       1.02
linf[9, 1]        1.00       1.02
linf[10, 1]       1.00       1.00
linf[11, 1]       1.00       1.00
linf[12, 1]       1.00       1.00
linf[1, 2]        1.00       1.00
linf[2, 2]        1.00       1.00
linf[3, 2]        1.00       1.00
linf[4, 2]        1.00       1.00
linf[5, 2]        1.00       1.00
linf[6, 2]        1.00       1.01
linf[7, 2]        1.00       1.01
linf[8, 2]        1.00       1.02
linf[9, 2]        1.00       1.01
linf[10, 2]       1.00       1.02
linf[11, 2]       1.00       1.00
linf[12, 2]       1.01       1.03

Multivariate psrf

1.04
Show code
print("effective sample size")
[1] "effective sample size"
Show code
laa.samples[, node.sub[], drop = FALSE] %>% effectiveSize()
 linf[1, 1]  linf[2, 1]  linf[3, 1]  linf[4, 1]  linf[5, 1]  linf[6, 1] 
   2022.670    3000.000    2380.157    2690.416    2377.354    2687.803 
 linf[7, 1]  linf[8, 1]  linf[9, 1] linf[10, 1] linf[11, 1] linf[12, 1] 
   2198.210    2708.590    1132.605    1876.298    2359.942    1417.662 
 linf[1, 2]  linf[2, 2]  linf[3, 2]  linf[4, 2]  linf[5, 2]  linf[6, 2] 
   2258.705    1224.626    1699.410    2841.389    2438.395    2552.862 
 linf[7, 2]  linf[8, 2]  linf[9, 2] linf[10, 2] linf[11, 2] linf[12, 2] 
   2328.636    1939.236    1729.165    1420.164    2238.529    1448.603 
Show code
node.sub <- grep("^lb.mu\\[", colnames(laa.samples[[1]]), value = TRUE)
laa.samples[, node.sub[], drop = FALSE] %>% mcmc_trace()

Show code
laa.samples[, node.sub[], drop = FALSE] %>% gelman.diag()
Potential scale reduction factors:

          Point est. Upper C.I.
lb.mu[1]           1       1.00
lb.mu[2]           1       1.00
lb.mu[3]           1       1.00
lb.mu[4]           1       1.00
lb.mu[5]           1       1.00
lb.mu[6]           1       1.00
lb.mu[7]           1       1.02
lb.mu[8]           1       1.02
lb.mu[9]           1       1.00
lb.mu[10]          1       1.00
lb.mu[11]          1       1.02
lb.mu[12]          1       1.00

Multivariate psrf

1.01
Show code
print("effective sample size")
[1] "effective sample size"
Show code
laa.samples[, node.sub[], drop = FALSE] %>% effectiveSize()
 lb.mu[1]  lb.mu[2]  lb.mu[3]  lb.mu[4]  lb.mu[5]  lb.mu[6]  lb.mu[7]  lb.mu[8] 
 3240.040  2653.986  3406.101  2615.826  2873.566  2623.409  2785.491  2888.338 
 lb.mu[9] lb.mu[10] lb.mu[11] lb.mu[12] 
 2773.759  2591.352  2627.348  2561.797 
Show code
node.sub <- grep("^R\\[", colnames(laa.samples[[1]]), value = TRUE) 
laa.samples[, node.sub[sample(1:length(node.sub), 36)], drop = FALSE] %>% mcmc_trace()

Show code
pl <- laa.samples[, node.sub[], drop = FALSE] %>% gelman.diag(multivariate = FALSE)
pl$psrf[  which(pl$psrf[, 1] > 1.1),] 
     Point est. Upper C.I.
Show code
pl <- laa.samples[, node.sub[], drop = FALSE] %>%
effectiveSize()
print("effective sample size")
[1] "effective sample size"
Show code
pl[which(pl < 500 & pl > 0)]
named numeric(0)

Predicted / observed

Show code
# Juvenile predictions
dfj <- laa.samples %>%
  spread_draws(l.mu.j[i], sep = ",") %>%
  median_qi() %>%
  rename(pred.l = l.mu.j,
         ind.id = i) %>%
  ungroup() %>%
  left_join(data.j %>%
              select(length_mm_j,spat.unit) %>%
              rename(obs.l = length_mm_j) %>%
              mutate(ind.id = row_number()), by = "ind.id")
rename: renamed 2 variables (ind.id, pred.l)
ungroup: no grouping variables remain
select: dropped 33 variables (sai_cou_code, year, site, origin, weight_g, …)
rename: renamed one variable (obs.l)
mutate: new variable 'ind.id' (integer) with 32,262 unique values and 0% NA
left_join: added 2 columns (obs.l, spat.unit)
           > rows only in x                               0
           > rows only in data.j %>% select(lengt.. (     0)
           > matched rows                            32,262
           >                                        ========
           > rows total                              32,262
Show code
dfj %>%
  ggplot() +
  geom_abline(slope = 1) +
  geom_point(aes(x = obs.l, y = pred.l),shape = 4,alpha = 0.5) +
  theme_minimal() +
  facet_wrap(~spat.unit) +
  labs(caption = "Juvenile length at age" )

Show code
ggsave(paste0(home, "/report/salaa_poj.png"), width = 17, height = 14, units = "cm")

# Sea age predictions
dfl <- data.b %>%
  select(length_mm,spat.unit,smo.age,hatch.year.f,sex,g.year,su) %>%
  rename(obs.l = length_mm) %>%
  left_join(
    laa.samples %>%
      spread_draws(l.mu[j,k,l,m,n], sep = ",") %>%
      median_qi() %>%  
      rename(pred.l = l.mu,
             su = j,
             smo.age = k,
             hatch.year.f = l,
             sex = m,
             g.year = n) %>%
      ungroup())
select: dropped 28 variables (sai_cou_code, year, site, origin, weight_g, …)
rename: renamed one variable (obs.l)
rename: renamed 6 variables (su, smo.age, hatch.year.f, sex, g.year, …)
ungroup: no grouping variables remain
Joining with `by = join_by(smo.age, hatch.year.f, sex, g.year, su)`
left_join: added 6 columns (pred.l, .lower, .upper, .width, .point, …)
           > rows only in x                               0
           > rows only in laa.samples %>% spread_.. (36,752)
           > matched rows                            47,297
           >                                        ========
           > rows total                              47,297
Show code
dfl %>%
  ggplot() +
  geom_abline(slope = 1) +
  geom_point(aes(x = obs.l, y = pred.l),shape = 4,alpha = 0.5) +
  theme_minimal() +
  facet_wrap(~spat.unit)  +
  labs(caption = "Sea length at age" )

Show code
ggsave(paste0(home, "/report/salaa_poa.png"), width = 17, height = 14, units = "cm")

rm(dfj,dfl)

Length at age variables

Show code
laa.samples %>%
  gather_draws(sig.l,sig.lj) %>%
  ggplot() +
  geom_density(aes(x = .value, color = .variable)) +
  facet_wrap(~.variable, scales = "free", ncol = 3) +
  theme_minimal() 

Show code
var.lats <- data.biph %>%
  mutate(m.lat = mean(lat), .by = spat.unit) %>%
  distinct(su, spat.unit, m.lat) %>%
  arrange(-m.lat)
mutate: new variable 'm.lat' (double) with 12 unique values and 0% NA
distinct: removed 79,547 rows (>99%), 12 rows remaining
Show code
su.labs <- var.lats %>%
  select(su, spat.unit) %>%
  { setNames(.$spat.unit, .$su) }
select: dropped one variable (m.lat)
Show code
laa.samples %>%
  gather_draws(lb.mu[su]) %>%
  ungroup() %>%
  left_join(data.biph %>% distinct(su,spat.unit)) %>%
  left_join(var.lats) %>%
  #filter(!spat.unit %in% c("Adour","Baltic.sea:AU-NA")) %>%
  mutate(r = reorder(su, m.lat)) %>%
  ggplot() +
  geom_boxplot(aes(x = .value, y = r),
               outliers = FALSE) +
  theme_minimal() +
  scale_y_discrete(labels = su.labs) +
  labs(y = "", x = "L_b")
ungroup: no grouping variables remain
distinct: removed 79,547 rows (>99%), 12 rows remaining
Joining with `by = join_by(su)`
left_join: added one column (spat.unit)
           > rows only in x                               0
           > rows only in data.biph %>% distinct(.. (     0)
           > matched rows                            36,000
           >                                        ========
           > rows total                              36,000
Joining with `by = join_by(su, spat.unit)`
left_join: added one column (m.lat)
           > rows only in x              0
           > rows only in var.lats (     0)
           > matched rows           36,000
           >                       ========
           > rows total             36,000
mutate: new variable 'r' (factor) with 12 unique values and 0% NA

Show code
ggsave(paste0(home, "/report/salaa_lb.png"), width = 17, height = 14, units = "cm")

laa.samples %>%
  gather_draws(g[su]) %>%
  ungroup() %>%
  left_join(data.biph %>% distinct(su,spat.unit)) %>%
  left_join(var.lats) %>%
  #filter(!spat.unit %in% c("Adour","Baltic.sea:AU-NA")) %>%
  mutate(r = reorder(su, m.lat)) %>%
  ggplot() +
  geom_boxplot(aes(x = exp(.value), y = r),
               outliers = FALSE) +
  theme_minimal() +
  scale_y_discrete(labels = su.labs) +
  labs(y = "", x = "r_j")
ungroup: no grouping variables remain
distinct: removed 79,547 rows (>99%), 12 rows remaining
Joining with `by = join_by(su)`
left_join: added one column (spat.unit)
           > rows only in x                               0
           > rows only in data.biph %>% distinct(.. (     0)
           > matched rows                            36,000
           >                                        ========
           > rows total                              36,000
Joining with `by = join_by(su, spat.unit)`
left_join: added one column (m.lat)
           > rows only in x              0
           > rows only in var.lats (     0)
           > matched rows           36,000
           >                       ========
           > rows total             36,000
mutate: new variable 'r' (factor) with 12 unique values and 0% NA

Show code
ggsave(paste0(home, "/report/salaa_r.png"), width = 17, height = 14, units = "cm")

laa.samples %>%
  gather_draws(k.year[su,sex,year], sep = ",") %>%
  ungroup() %>%
  left_join(data.biph %>% distinct(su,spat.unit)) %>%
  left_join(var.lats) %>%
  mutate(sex = ifelse(sex == 1, "f","m"),
         r = reorder(su, m.lat)) %>%
  ggplot() +
  geom_boxplot(aes(x = exp(.value), y = r, color = sex),
               outliers = FALSE) +
  theme_minimal() +
  coord_cartesian(xlim = c(-0.5,4)) +
  scale_y_discrete(labels = su.labs) +
  labs(y = "", x = "k")
ungroup: no grouping variables remain
distinct: removed 79,547 rows (>99%), 12 rows remaining
Joining with `by = join_by(su)`
left_join: added one column (spat.unit)
           > rows only in x                                  0
           > rows only in data.biph %>% distinct(.. (        0)
           > matched rows                            2,016,000
           >                                        ===========
           > rows total                              2,016,000
Joining with `by = join_by(su, spat.unit)`
left_join: added one column (m.lat)
           > rows only in x                 0
           > rows only in var.lats (        0)
           > matched rows           2,016,000
           >                       ===========
           > rows total             2,016,000
mutate: converted 'sex' from integer to character (0 new NA)
        new variable 'r' (factor) with 12 unique values and 0% NA

Show code
laa.samples %>%
  gather_draws(linf[su,sex], sep = ",") %>%
  ungroup() %>%
  left_join(data.biph %>% distinct(su,spat.unit)) %>%
  left_join(var.lats) %>%
  mutate(sex = ifelse(sex == 1, "f","m"),
         r = reorder(su, m.lat)) %>%
  ggplot() +
  geom_boxplot(aes(x = exp(.value), y = r, color = sex),
               outliers = FALSE) +
  scale_y_discrete(labels = su.labs) +
  theme_minimal() +
  labs(y = "", x = expression(L[infinity]))
ungroup: no grouping variables remain
distinct: removed 79,547 rows (>99%), 12 rows remaining
Joining with `by = join_by(su)`
left_join: added one column (spat.unit)
           > rows only in x                               0
           > rows only in data.biph %>% distinct(.. (     0)
           > matched rows                            72,000
           >                                        ========
           > rows total                              72,000
Joining with `by = join_by(su, spat.unit)`
left_join: added one column (m.lat)
           > rows only in x              0
           > rows only in var.lats (     0)
           > matched rows           72,000
           >                       ========
           > rows total             72,000
mutate: converted 'sex' from integer to character (0 new NA)
        new variable 'r' (factor) with 12 unique values and 0% NA

Show code
ggsave(paste0(home, "/report/salaa_linf.png"), width = 17, height = 14, units = "cm")

Predicted length at age curves

Show code
gc()
           used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
Ncells  3501120 187.0    5391878 288.0         NA  5391878 288.0
Vcells 12784369  97.6   24743782 188.8     102400 20553061 156.9
Show code
# based on l.mu (age.type = "both") 
dfp <- laa.samples[[1]][sample(1:1500,1000), ] %>% # sample from one chain to reduce object size
  spread_draws(l.mu[su,smo.age,coh,sex,gy], sep = ",") %>%
  ungroup() 
ungroup: no grouping variables remain
Show code
masu <- data.biph %>%
  summarise(ma = max(g.year), .by = su) %>%
  arrange(su)
summarise: now 12 rows and 2 columns, ungrouped
Show code
smcsu <- data.biph %>%
  distinct(su, hatch.year.f, smo.age, sex) %>%
  rename(coh = hatch.year.f) 
distinct: removed 78,331 rows (98%), 1,228 rows remaining
rename: renamed one variable (coh)
Show code
dfp %>%
  filter(gy <= masu[su,2])  %>%
  semi_join(smcsu, by = c("su", "smo.age", "coh", "sex")) %>%
  filter(#sex == 1, #females
         #coh %in% c(2,8,16)
         ) %>%
  ungroup() %>%
  left_join(data.biph %>% distinct(su, spat.unit)) %>%
  ggplot() +
  geom_boxplot(aes(x = factor(gy), y = l.mu, color = factor(sex)), 
               outliers = FALSE) +
  facet_wrap(~spat.unit, scales = "free") +
  theme_minimal() +
  labs(title = "females")
Warning: Using one column matrices in `filter()` was deprecated in dplyr 1.1.0.
ℹ Please use one dimensional logical vectors instead.
ℹ The deprecated feature was likely used in the tidylog package.
  Please report the issue at <https://github.com/elbersb/tidylog/issues>.
filter: removed 14,250,000 rows (37%), 24,750,000 rows remaining
semi_join: added no columns
           > rows only in x     (16,101,000)
           > rows only in smcsu (       259)
           > matched rows         8,649,000
           >                    ============
           > rows total           8,649,000
filter: no rows removed
ungroup: no grouping variables remain
distinct: removed 79,547 rows (>99%), 12 rows remaining
Joining with `by = join_by(su)`
left_join: added one column (spat.unit)
           > rows only in x                                  0
           > rows only in data.biph %>% distinct(.. (        0)
           > matched rows                            8,649,000
           >                                        ===========
           > rows total                              8,649,000
Warning: Removed 722 rows containing non-finite outside the scale range
(`stat_boxplot()`).

Show code
ggsave(paste0(home, "/report/salaa_cur.png"), width = 17, height = 14, units = "cm")
Warning: Removed 722 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Show code
dfp %>%
  filter(gy <= masu[su,2]) %>%
  semi_join(smcsu, by = c("su", "smo.age", "coh", "sex")) %>%
  filter(sex == 1,
         smo.age == c(1:3)) %>% #females
  summarise(ml = median(l.mu), .by = c(su, coh, gy, smo.age)) %>%
  rename(hatch.year.f = coh) %>%
  left_join(data.biph %>% distinct(su, spat.unit)) %>%
  left_join(data.biph %>% distinct(hatch.year.f, hatch.year)) %>%
  filter(!ml == 0) %>%
  ggplot() +
  geom_line(aes(x = gy, y = ml, group = interaction(hatch.year, smo.age)), color = "grey") +
  geom_point(data = data.biph %>% 
               filter(sex == 1,
                      #hatch.year.f %in% c(1,5,10,15,20,25)
                      ),
             aes(x = g.year, y = length_mm), size = 0.3, alpha = 0.1 ) +
  facet_wrap(~spat.unit) +
  theme_minimal() +
  labs(title = "females")
filter: removed 14,250,000 rows (37%), 24,750,000 rows remaining
semi_join: added no columns
           > rows only in x     (16,101,000)
           > rows only in smcsu (       259)
           > matched rows         8,649,000
           >                    ============
           > rows total           8,649,000
filter: removed 7,468,672 rows (86%), 1,180,328 rows remaining
summarise: now 3,541 rows and 5 columns, ungrouped
rename: renamed one variable (hatch.year.f)
distinct: removed 79,547 rows (>99%), 12 rows remaining
Joining with `by = join_by(su)`
left_join: added one column (spat.unit)
           > rows only in x                              0
           > rows only in data.biph %>% distinct(.. (    0)
           > matched rows                            3,541
           >                                        =======
           > rows total                              3,541
distinct: removed 79,532 rows (>99%), 27 rows remaining
Joining with `by = join_by(hatch.year.f)`
left_join: added one column (hatch.year)
           > rows only in x                              0
           > rows only in data.biph %>% distinct(.. (    2)
           > matched rows                            3,541
           >                                        =======
           > rows total                              3,541
filter: removed 255 rows (7%), 3,286 rows remaining
filter: removed 49,976 rows (63%), 29,583 rows remaining

Show code
dfp %>%
  filter(gy <= masu[su,2]) %>%
  semi_join(smcsu, by = c("su", "smo.age", "coh", "sex")) %>%
  filter(sex == 2,
         smo.age == c(1:3)) %>% #females
  summarise(ml = median(l.mu), .by = c(su, coh, gy, smo.age)) %>%
  rename(hatch.year.f = coh) %>%
  left_join(data.biph %>% distinct(su, spat.unit)) %>%
  left_join(data.biph %>% distinct(hatch.year.f, hatch.year)) %>%
  filter(!ml == 0) %>%
  ggplot() +
  geom_line(aes(x = gy, y = ml, group = interaction(hatch.year, smo.age)), color = "grey") +
  geom_point(data = data.biph %>% 
               filter(sex == 2,
                      #hatch.year.f %in% c(1,5,10,15,20,25)
                      ),
             aes(x = g.year, y = length_mm), size = 0.3, alpha = 0.1 ) +
  facet_wrap(~spat.unit) +
  theme_minimal() +
  labs(title = "males")
filter: removed 14,250,000 rows (37%), 24,750,000 rows remaining
semi_join: added no columns
           > rows only in x     (16,101,000)
           > rows only in smcsu (       259)
           > matched rows         8,649,000
           >                    ============
           > rows total           8,649,000
filter: removed 7,481,329 rows (86%), 1,167,671 rows remaining
summarise: now 3,503 rows and 5 columns, ungrouped
rename: renamed one variable (hatch.year.f)
distinct: removed 79,547 rows (>99%), 12 rows remaining
Joining with `by = join_by(su)`
left_join: added one column (spat.unit)
           > rows only in x                              0
           > rows only in data.biph %>% distinct(.. (    0)
           > matched rows                            3,503
           >                                        =======
           > rows total                              3,503
distinct: removed 79,532 rows (>99%), 27 rows remaining
Joining with `by = join_by(hatch.year.f)`
left_join: added one column (hatch.year)
           > rows only in x                              0
           > rows only in data.biph %>% distinct(.. (    2)
           > matched rows                            3,503
           >                                        =======
           > rows total                              3,503
filter: removed 247 rows (7%), 3,256 rows remaining
filter: removed 55,340 rows (70%), 24,219 rows remaining

k over time

Show code
tmpK <- laa.samples %>%
  gather_draws(k.year[su, sex, coh], sep = ",") %>%
  ungroup() %>%
  left_join(data.b %>% distinct(su, spat.unit)) %>% 
  mutate(year = coh + 1998,
         sex = ifelse(sex == 1, "m","f")) %>%
  mutate(m_length = median(.value), .by = c(year,sex)) %>%
  filter(year < 2026)
ungroup: no grouping variables remain
distinct: removed 47,285 rows (>99%), 12 rows remaining
Joining with `by = join_by(su)`
left_join: added one column (spat.unit)
           > rows only in x                                  0
           > rows only in data.b %>% distinct(su,.. (        0)
           > matched rows                            2,016,000
           >                                        ===========
           > rows total                              2,016,000
mutate: converted 'sex' from integer to character (0 new NA)
        new variable 'year' (double) with 28 unique values and 0% NA
mutate: new variable 'm_length' (double) with 56 unique values and 0% NA
filter: removed 72,000 rows (4%), 1,944,000 rows remaining
Show code
tmpK %>%
  ggplot() +
  geom_boxplot(aes(x = factor(year), y = exp(.value), color = factor(sex)),
               outliers = FALSE) +
  facet_wrap(~sex) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90,hjust = 1, size = rel(.75))) +
  labs(x = "year", y = "k")

Show code
tmpK %>%
  filter(sex == "m",
         !spat.unit == "Ireland_west"
         ) %>%
  ungroup() %>%
  ggplot() +
  geom_boxplot(aes(x = year, y = exp(.value), group = year), color = "#00BFC4", outliers = FALSE) +
  geom_line(aes(y = exp(m_length), x = year), color = "#00BFC4", alpha = 0.4) +
  facet_wrap(~spat.unit, scales = "free") + 
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90,hjust = 1, size = rel(.75))) +
  labs(x = "year", y = "k", title = "males", caption = "line is global median by year and sex")
filter: removed 1,053,000 rows (54%), 891,000 rows remaining
ungroup: no grouping variables remain

Show code
ggsave(paste0(home, "/report/salaa_km.png"), width = 17, height = 14, units = "cm")
  
tmpK %>%
  filter(sex == "f",
         !spat.unit == "Ireland_west"
         ) %>%
  ungroup() %>%
  ggplot() +
  geom_boxplot(aes(x = year, y = exp(.value), group = year), color = "#F8766D", outliers = FALSE) +
  geom_line(aes(y = exp(m_length), x = year), color = "#F8766D", alpha = 0.5) +
  facet_wrap(~spat.unit, scales = "free") + 
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90,hjust = 1, size = rel(.75))) +
  labs(x = "year", y = "k", title = "females", caption = "line is global median by year and sex")
filter: removed 1,053,000 rows (54%), 891,000 rows remaining
ungroup: no grouping variables remain

Show code
ggsave(paste0(home, "/report/salaa_kf.png"), width = 17, height = 14, units = "cm")

tmpK %>%
  filter(spat.unit == "Ireland_west") %>%
  ungroup() %>%
  ggplot() +
  geom_boxplot(aes(x = year, y = exp(.value), group = year, color = sex), outliers = FALSE) +
  geom_line(aes(y = exp(m_length), x = year, color = sex), alpha = 0.5) +
  facet_wrap(~sex, scales = "free") + 
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90,hjust = 1, size = rel(.75))) +
  labs(x = "year", y = "k", title = "Ireland_west", caption = "line is global median by year and sex")
filter: removed 1,782,000 rows (92%), 162,000 rows remaining
ungroup: no grouping variables remain

Show code
ggsave(paste0(home, "/report/salaa_kie.png"), width = 17, height = 14, units = "cm")

Correlation/covariance matrix

Show code
# median sample correlation 
samp <- laa.samples[[1]]

cols <- grep("^R",colnames(as.matrix(samp)))


corr_arr <- array(NA, dim = c(12, 12, nrow(samp)))

for (i in 1:nrow(samp)) {

  R_sample <- matrix(
    as.numeric(samp[i, cols]),
    nrow = 12,
    ncol = 12,
    byrow = FALSE
  )

  corr_arr[, , i] <- crossprod(R_sample)
}

corr <- apply(corr_arr, c(1, 2), median)

var.lats <- data.biph %>%
  mutate(m.lat = mean(lat), .by = spat.unit) %>%
  distinct(su, spat.unit, m.lat) %>%
  arrange(-m.lat)
mutate: new variable 'm.lat' (double) with 12 unique values and 0% NA
distinct: removed 79,547 rows (>99%), 12 rows remaining
Show code
su.labs <- var.lats %>%
  select(su, spat.unit) %>%
  { setNames(.$spat.unit, .$su) }
select: dropped one variable (m.lat)
Show code
as.data.frame(corr) %>%
  mutate(r = row_number()) %>%
  pivot_longer(cols = -r, names_to = "c", values_to = "value") %>%
  mutate(c = as.integer(gsub("V", "", c)) ) %>%
  left_join(var.lats, by = c("r" = "su")) %>%
  left_join(var.lats, by = c("c" = "su"), suffix = c(".x", ".y")) %>%
  mutate(r = reorder(r, m.lat.x),
         c = reorder(c, -m.lat.y)) %>%
  ggplot(aes(x=c, y=r, fill = value)) + 
  geom_tile() +
  scale_fill_viridis_c() +
  scale_x_discrete(labels = su.labs) +
  scale_y_discrete(labels = su.labs) +
  theme(axis.text = element_text(angle = 45, hjust = 1, size = rel(.75)))
mutate: new variable 'r' (integer) with 12 unique values and 0% NA
pivot_longer: reorganized (V1, V2, V3, V4, V5, …) into (c, value) [was 12x13, now 144x3]
mutate: converted 'c' from character to integer (0 new NA)
left_join: added 2 columns (spat.unit, m.lat)
           > rows only in x           0
           > rows only in var.lats (  0)
           > matched rows           144
           >                       =====
           > rows total             144
left_join: added 4 columns (spat.unit.x, m.lat.x, spat.unit.y, m.lat.y)
           > rows only in x           0
           > rows only in var.lats (  0)
           > matched rows           144
           >                       =====
           > rows total             144
mutate: converted 'r' from integer to factor (0 new NA)
        converted 'c' from integer to factor (0 new NA)